# Alex Gazmararian
# agazmararian@gmail.com

library(tidyverse)
library(tidylog)
library(readxl)
library(here)
library(modelsummary)

n_rows <- length(read_lines(here("data", "input", "bea", "CAGDP9", "CAGDP9__ALL_AREAS_2001_2023.csv")))
# Load all but last 5 rows which are a note
gdp <- read_csv(
  here("data", "input", "bea", "CAGDP9", "CAGDP9__ALL_AREAS_2001_2023.csv"), 
  n_max = n_rows - 5,
  na = c("", "NA", "(NA)", "(D)")
  )

gdp <- gdp %>%
  rename(fips = GeoFIPS) %>%
  mutate(fips = trimws(fips)) %>%
  filter(!substr(fips, 3, 5) == "000")

gdp$Description <- trimws(gdp$Description)
gdp <- subset(gdp, Description %in% c("All industry total", "Manufacturing"))

gdp <- gdp %>%
     pivot_longer(cols = `2001`:`2023`, names_to = "year", values_to = "gdp") %>%
     mutate(year = as.numeric(year))

gdp <- gdp %>%
    mutate(Description = ifelse(Description == "All industry total", "all", "man"))

gdp <- subset(gdp, select = c(fips, year, Description, gdp))

gdp <- gdp %>%
  pivot_wider(names_from = Description, values_from = gdp) %>%
  rename(
    gdp = all,
    gdp_man = man
  )

gdp$fips <- as.numeric(gdp$fips)

# No coverage for Alaska and 
gdp$gdp <- as.numeric(gdp$gdp)
gdp$gdp_man <- as.numeric(gdp$gdp_man)
gdp$gdp_ln <- log(gdp$gdp)
gdp$man_share <- gdp$gdp_man / gdp$gdp
gdp <- rename(gdp, fips.bea = fips)

# Load other economic data
n_rows_econ <- length(read_lines(here("data", "input", "bea", "CAINC30", "CAINC30__ALL_AREAS_1969_2023.csv")))
econ <- read_csv(
  here("data", "input", "bea", "CAINC30", "CAINC30__ALL_AREAS_1969_2023.csv"),
  n_max = n_rows_econ - 5,
  na = c("", "NA", "(NA)", "(D)")
)

econ$Description <- trimws(econ$Description)
econ <- filter(econ, Description %in% c("Per capita net earnings 4/"))
econ <- econ %>%
     pivot_longer(cols = `1969`:`2023`, names_to = "year", values_to = "income_pc") %>%
     mutate(year = as.numeric(year))

econ <- econ %>%
  rename(
    fips.bea = GeoFIPS,
    income_pc = income_pc
  )

econ <- econ %>%
  mutate(fips.bea = trimws(fips.bea)) %>%
  filter(!substr(fips.bea, 3, 5) == "000") %>%
  mutate(fips.bea = as.numeric(fips.bea))

econ$income_pc <- as.numeric(econ$income_pc)

gdp <- subset(gdp, select = c(fips.bea, year, gdp, gdp_man, gdp_ln, man_share))
econ <- subset(econ, select = c(fips.bea, year, income_pc))

g.out <- full_join(gdp, econ, by = c("fips.bea", "year"))

# https://researchrepository.wvu.edu/cgi/viewcontent.cgi?article=1000&context=rri_tech_docs
# Apply FIPS splits for Virginia combined independent cities/counties
# Keep original combined FIPS rows and add new split rows with separate columns
# fips.bea: original BEA FIPS (combined for split counties)  
# fips.census: individual census FIPS codes

# Initialize fips.census column 
# For non-Virginia counties: fips.census = fips.bea
# For Virginia combined counties: fips.census = NA (will be set in split rows)
combined_fips <- c(51939, 51901, 51903, 51907, 51911, 51913, 51918, 51919, 
                   51921, 51923, 51929, 51931, 51933, 51941, 51942, 51944, 
                   51945, 51947, 51949, 51951, 51953, 51955, 51958)

g.out <- g.out %>%
  mutate(fips.census = ifelse(fips.bea %in% combined_fips, NA, fips.bea))

# Split 51939 into Pittsylvania (51143), Danville (51590)
if (any(g.out$fips.bea == 51939)) {
  orig_data <- g.out %>% filter(fips.bea == 51939)
  split_51939 <- bind_rows(
    orig_data %>% mutate(fips.census = 51143),
    orig_data %>% mutate(fips.census = 51590)
  )
  g.out <- bind_rows(g.out, split_51939)
}

# Split 51901 into Albemarle (51003), Charlottesville (51540)
if (any(g.out$fips.bea == 51901)) {
  orig_data <- g.out %>% filter(fips.bea == 51901)
  split_51901 <- bind_rows(
    orig_data %>% mutate(fips.census = 51003),
    orig_data %>% mutate(fips.census = 51540)
  )
  g.out <- bind_rows(g.out, split_51901)
}

# Split 51903 into Alleghany (51005), Covington (51580)
if (any(g.out$fips.bea == 51903)) {
  orig_data <- g.out %>% filter(fips.bea == 51903)
  split_51903 <- bind_rows(
    orig_data %>% mutate(fips.census = 51005),
    orig_data %>% mutate(fips.census = 51580)
  )
  g.out <- bind_rows(g.out, split_51903)
}

# Split 51907 into Augusta (51015), Staunton (51790), Waynesboro (51820)
if (any(g.out$fips.bea == 51907)) {
  orig_data <- g.out %>% filter(fips.bea == 51907)
  split_51907 <- bind_rows(
    orig_data %>% mutate(fips.census = 51015),
    orig_data %>% mutate(fips.census = 51790),
    orig_data %>% mutate(fips.census = 51820)
  )
  g.out <- bind_rows(g.out, split_51907)
}

# Split 51911 into Campbell (51031), Lynchburg (51680)
if (any(g.out$fips.bea == 51911)) {
  orig_data <- g.out %>% filter(fips.bea == 51911)
  split_51911 <- bind_rows(
    orig_data %>% mutate(fips.census = 51031),
    orig_data %>% mutate(fips.census = 51680)
  )
  g.out <- bind_rows(g.out, split_51911)
}

# Split 51913 into Carroll (51035), Galax (51640)
if (any(g.out$fips.bea == 51913)) {
  orig_data <- g.out %>% filter(fips.bea == 51913)
  split_51913 <- bind_rows(
    orig_data %>% mutate(fips.census = 51035),
    orig_data %>% mutate(fips.census = 51640)
  )
  g.out <- bind_rows(g.out, split_51913)
}

# Split 51918 into Dinwiddie (51053), Colonial Heights (51570), Petersburg (51730)
if (any(g.out$fips.bea == 51918)) {
  orig_data <- g.out %>% filter(fips.bea == 51918)
  split_51918 <- bind_rows(
    orig_data %>% mutate(fips.census = 51053),
    orig_data %>% mutate(fips.census = 51570),
    orig_data %>% mutate(fips.census = 51730)
  )
  g.out <- bind_rows(g.out, split_51918)
}

# Split 51919 into Fairfax (51059), Fairfax City (51600), Falls Church (51610)
if (any(g.out$fips.bea == 51919)) {
  orig_data <- g.out %>% filter(fips.bea == 51919)
  split_51919 <- bind_rows(
    orig_data %>% mutate(fips.census = 51059),
    orig_data %>% mutate(fips.census = 51600),
    orig_data %>% mutate(fips.census = 51610)
  )
  g.out <- bind_rows(g.out, split_51919)
}

# Split 51921 into Frederick (51069), Winchester (51840)
if (any(g.out$fips.bea == 51921)) {
  orig_data <- g.out %>% filter(fips.bea == 51921)
  split_51921 <- bind_rows(
    orig_data %>% mutate(fips.census = 51069),
    orig_data %>% mutate(fips.census = 51840)
  )
  g.out <- bind_rows(g.out, split_51921)
}

# Split 51923 into Greensville (51081), Emporia (51595)
if (any(g.out$fips.bea == 51923)) {
  orig_data <- g.out %>% filter(fips.bea == 51923)
  split_51923 <- bind_rows(
    orig_data %>% mutate(fips.census = 51081),
    orig_data %>% mutate(fips.census = 51595)
  )
  g.out <- bind_rows(g.out, split_51923)
}

# Split 51929 into Henry (51089), Martinsville (51690)
if (any(g.out$fips.bea == 51929)) {
  orig_data <- g.out %>% filter(fips.bea == 51929)
  split_51929 <- bind_rows(
    orig_data %>% mutate(fips.census = 51089),
    orig_data %>% mutate(fips.census = 51690)
  )
  g.out <- bind_rows(g.out, split_51929)
}

# Split 51931 into James City (51095), Williamsburg (51830)
if (any(g.out$fips.bea == 51931)) {
  orig_data <- g.out %>% filter(fips.bea == 51931)
  split_51931 <- bind_rows(
    orig_data %>% mutate(fips.census = 51095),
    orig_data %>% mutate(fips.census = 51830)
  )
  g.out <- bind_rows(g.out, split_51931)
}

# Split 51933 into Montgomery (51121), Radford (51750)
if (any(g.out$fips.bea == 51933)) {
  orig_data <- g.out %>% filter(fips.bea == 51933)
  split_51933 <- bind_rows(
    orig_data %>% mutate(fips.census = 51121),
    orig_data %>% mutate(fips.census = 51750)
  )
  g.out <- bind_rows(g.out, split_51933)
}

# Split 51941 into Prince George (51149), Hopewell (51670)
if (any(g.out$fips.bea == 51941)) {
  orig_data <- g.out %>% filter(fips.bea == 51941)
  split_51941 <- bind_rows(
    orig_data %>% mutate(fips.census = 51149),
    orig_data %>% mutate(fips.census = 51670)
  )
  g.out <- bind_rows(g.out, split_51941)
}

# Split 51942 into Prince William (51153), Manassas (51683), Manassas Park (51685)
if (any(g.out$fips.bea == 51942)) {
  orig_data <- g.out %>% filter(fips.bea == 51942)
  split_51942 <- bind_rows(
    orig_data %>% mutate(fips.census = 51153),
    orig_data %>% mutate(fips.census = 51683),
    orig_data %>% mutate(fips.census = 51685)
  )
  g.out <- bind_rows(g.out, split_51942)
}

# Split 51944 into Roanoke (51161), Salem (51775)
if (any(g.out$fips.bea == 51944)) {
  orig_data <- g.out %>% filter(fips.bea == 51944)
  split_51944 <- bind_rows(
    orig_data %>% mutate(fips.census = 51161),
    orig_data %>% mutate(fips.census = 51775)
  )
  g.out <- bind_rows(g.out, split_51944)
}

# Split 51945 into Rockbridge (51163), Buena Vista (51530), Lexington (51678)
if (any(g.out$fips.bea == 51945)) {
  orig_data <- g.out %>% filter(fips.bea == 51945)
  split_51945 <- bind_rows(
    orig_data %>% mutate(fips.census = 51163),
    orig_data %>% mutate(fips.census = 51530),
    orig_data %>% mutate(fips.census = 51678)
  )
  g.out <- bind_rows(g.out, split_51945)
}

# Split 51947 into Rockingham (51165), Harrisonburg (51660)
if (any(g.out$fips.bea == 51947)) {
  orig_data <- g.out %>% filter(fips.bea == 51947)
  split_51947 <- bind_rows(
    orig_data %>% mutate(fips.census = 51165),
    orig_data %>% mutate(fips.census = 51660)
  )
  g.out <- bind_rows(g.out, split_51947)
}

# Split 51949 into Southampton (51175), Franklin (51620)
if (any(g.out$fips.bea == 51949)) {
  orig_data <- g.out %>% filter(fips.bea == 51949)
  split_51949 <- bind_rows(
    orig_data %>% mutate(fips.census = 51175),
    orig_data %>% mutate(fips.census = 51620)
  )
  g.out <- bind_rows(g.out, split_51949)
}

# Split 51951 into Spotsylvania (51177), Fredericksburg (51630)
if (any(g.out$fips.bea == 51951)) {
  orig_data <- g.out %>% filter(fips.bea == 51951)
  split_51951 <- bind_rows(
    orig_data %>% mutate(fips.census = 51177),
    orig_data %>% mutate(fips.census = 51630)
  )
  g.out <- bind_rows(g.out, split_51951)
}

# Split 51953 into Washington (51191), Bristol (51520)
if (any(g.out$fips.bea == 51953)) {
  orig_data <- g.out %>% filter(fips.bea == 51953)
  split_51953 <- bind_rows(
    orig_data %>% mutate(fips.census = 51191),
    orig_data %>% mutate(fips.census = 51520)
  )
  g.out <- bind_rows(g.out, split_51953)
}

# Split 51955 into Wise (51195), Norton (51720)
if (any(g.out$fips.bea == 51955)) {
  orig_data <- g.out %>% filter(fips.bea == 51955)
  split_51955 <- bind_rows(
    orig_data %>% mutate(fips.census = 51195),
    orig_data %>% mutate(fips.census = 51720)
  )
  g.out <- bind_rows(g.out, split_51955)
}

# Split 51958 into York (51199), Poquoson (51735)
if (any(g.out$fips.bea == 51958)) {
  orig_data <- g.out %>% filter(fips.bea == 51958)
  split_51958 <- bind_rows(
    orig_data %>% mutate(fips.census = 51199),
    orig_data %>% mutate(fips.census = 51735)
  )
  g.out <- bind_rows(g.out, split_51958)
}

# Lag one year
g.out$year_lag <- g.out$year + 1

write_csv(g.out, here("data", "inter", "bea_county_processed.csv"))
message("Saved BEA data")